#! /usr/bin/env R
#
# Code to estimate the PCs
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
message("Log file for code executed at\n")
message(format(Sys.time(), "%a %b %d %X %Y"))
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
library(magrittr)
library(glue);
library(lubridate)
library(stringr)
library(haven);
library(RcppRoll)
library(data.table);
library(statar)
library(zoo)
library(ggplot2)

library(tidyr)
library(dplyr)
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 1. Import and add ISO Country names to currencies:
dt_fx = fread("../task_data/output/FX_daily_long.csv")
dt_fx = dt_fx[, .(date= ymd(date), ticker, base, foreign, prc=as.numeric(value))]

dt_ccodes <- read_dta("./input/country-codes.dta") %>% data.table

dt_fx <- merge(dt_fx, dt_ccodes[, .(base=ISO4217, base_country=ISO3166)], by = c("base"), all.x=T)
dt_fx <- merge(dt_fx, dt_ccodes[, .(foreign=ISO4217, foreign_country=ISO3166)], by = c("foreign"), all.x=T)
dt_fx[base=="TWD",base_country:="TWN"]
dt_fx[foreign=="TWD",foreign_country:="TWN"]
dt_fx[base=="HKD",base_country:="HKG"]
dt_fx[foreign=="HKD",foreign_country:="HKG"]
dt_fx[base=="EUR",base_country:="EUR"]
dt_fx[foreign=="EUR",foreign_country:="EUR"]

# drop weekends on some tickers
dt_fx <- dt_fx[ wday(date) %in% c(2,3,4,5,6) ,] 
dt_fx[ !is.na(prc) ]
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 2. Constructing Base Factors (as in Lustig/Richmond(2018)), but here
setorder(dt_fx, ticker, date)
dt_fx[, datey := year(date) ]
dt_fx[, log_prc := log(prc)]
dt_fx[, d1_prc  := log_prc - tlag(log_prc, n=1L, time=date), by = .(ticker) ]

dt_fx[, range_euro := 0 ]
for (c_euro in c("ATS", "BEF", "FIM", "FRF", "DEM", "GRD", 
                 "IEP", "ITL", "NLG", "PTE", "ESP")){
  dt_fx[ base == c_euro | foreign == c_euro, range_euro := 1 ]
}

# Fix pre euro currencies
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , log_prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , d1_prc := NA ]

# Estimate the base factor (and foreign)
dt_fx[, d1_base_factor := mean(d1_prc, na.rm=T), by = .(base, date) ]  # calculated for all countries
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# LOAD AND MERGE THE PCAs from PCA_base_new.R
dt_PCA = fread("./output/PCs_date.csv")
dt_PCA_small = dt_PCA[, .(date, PC1, PC2, PC3, PC4, PC5, PC6) ]

dt_fx = merge(dt_fx, dt_PCA_small, all.x=T, by="date")
dt_fx[ !is.na(PC1) ]
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# REGRESS Delta s_ij,t on the PCs 
#   We are extracting systematic and idiosyncratic component
dt_fx_reg = copy(dt_fx)
dt_fx_reg = dt_fx[, c("date", "base_country", "foreign_country", "d1_prc", 
  glue("PC{1:6}")), with=F ]
dt_fx_reg[, isopair := paste0(base_country, "_", foreign_country) ]
dt_fx_reg = dt_fx_reg[ !is.na(d1_prc) & !is.na(PC1) ]
dt_fx_reg[, date_y := year(date) ]
l_isopair = unique(dt_fx_reg[["isopair"]])

for (date_iter in 1971:2019) {
  message("Processing year ... ", date_iter)
  dt_year_tmp = dt_fx_reg[ date_y == date_iter ]
  l_isopair = unique(dt_year_tmp[["isopair"]])
  for (isopair_iter in l_isopair){
    # isopair_iter = "CAN_ISR"
    dt_reg_tmp = dt_year_tmp[ isopair==isopair_iter ]  
    rtmp = lm(d1_prc ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, dt_reg_tmp)
    dt_fx_reg[ date_y==date_iter & isopair==isopair_iter, 
      `:=`(d1_prc_syst = rtmp$fitted.values, d1_prc_idio=rtmp$residuals)]
  }
}
dt_fx_reg
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# Estimate volatility of these measures
dt_vol = dt_fx_reg[, .(date, base_country, foreign_country, isopair, date_y, 
  d1_prc_syst, d1_prc_idio) ]

dt_vol[, nobs_y := .N, by = .(date_y, isopair) ]
dt_vol %>% tab(nobs_y)
setorder(dt_vol, isopair, date)

fwrite(dt_vol, "./output/volatility_PCA.csv")
# ---------------------------------------------------------------------


























